from bs4 import BeautifulSoup
import requests
import xlwt


class Result:
    def __init__(self):
        pass


url = 'https://cd.ke.com/ershoufang'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}

response1 = requests.get(url, headers=headers)
soup1 = BeautifulSoup(response1.content, 'html.parser')

# 解析有多少个区域
areas = soup1.find('div', class_='position').findAll('a', class_='CLICKDATA')
# 所有区域
allArea = [{'name': item.text, 'text': item['href'].split('/ershoufang')[1]} for item in areas]
wb = xlwt.Workbook()
for index in range(0, len(allArea)):
    item = allArea[index]
    sh = wb.add_sheet(item['name'])
    sh.write(0, 0, '卖点')
    sh.write(0, 1, '楼层')
    sh.write(0, 2, '标签')
    sh.write(0, 3, '单价')
    sh.write(0, 4, '总价')
    sh.col(0).width = 256 * 40
    sh.col(1).width = 256 * 40
    sh.col(2).width = 256 * 40
    sh.col(3).width = 256 * 40
    outResult = []
    pageNo = 1  # 当前页码
    haveNext = True
    while haveNext:
        print(f"正在爬取 {item['name']} 第{pageNo}页 的数据...")
        realUrl = f"{url}{item['text']}pg{pageNo}"
        response = requests.get(realUrl, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        houses = soup.findAll('li', class_='clear')
        for li in houses:
            result = Result()
            maidian = li.find('a', class_='maidian-detail')['title']
            louceng = li.find('div', class_='houseInfo').text.replace(' ', '').replace('\n', '')
            tag = li.find('div', class_='tag')
            tags = tag.findAll('span')
            biaoqian = []
            for item1 in tags:
                biaoqian.append(item1.text)
            zongjiaDiv = li.find('div', class_='totalPrice totalPrice2')
            zongjia = zongjiaDiv.find('span').text.replace(' ', '')
            danjiaDiv = li.find('div', class_='unitPrice')
            danjia = danjiaDiv.find('span').text
            result.maidian = maidian
            result.louceng = louceng
            result.biaoqian = biaoqian
            result.zongjia = f"{zongjia}万"
            result.danjia = danjia
            print(result)
            outResult.append(result)
        # check是否还有下一页
        pageData = eval(soup.find('div', class_='page-box house-lst-page-box')['page-data'])
        haveNext = pageData['curPage'] < pageData['totalPage']
        pageNo += 1

    for i in range(0, len(outResult)):
        data = outResult[i]
        sh.write(i + 1, 0, data.maidian)
        sh.write(i + 1, 1, data.louceng)
        sh.write(i + 1, 2, data.biaoqian)
        sh.write(i + 1, 3, data.danjia)
        sh.write(i + 1, 4, data.zongjia)

wb.save('test.xls')
print('处理完毕。。')
